import mmcv
import matplotlib.pyplot as plt
from fastcore.basics import *
from fastai.vision.all import *
from fastai.torch_basics import *
import warnings
warnings.filterwarnings("ignore")
import kornia
from kornia.constants import Resample
from kornia.color import *
from kornia import augmentation as K
import kornia.augmentation as F
import kornia.augmentation.random_generator as rg
from torchvision.transforms import functional as tvF
from torchvision.transforms import transforms
from torchvision.transforms import PILToTensor
from functools import partial
from timm.models.layers import trunc_normal_, DropPath
from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
from timm.models.vision_transformer import _cfg
from einops import rearrange
from timm.models.registry import register_model
set_seed(105)
dataset overview
rootPath = Path('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/SOD/RGBDcollection/')
rgbPath = Path('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/SOD/RGBDcollection/LR/')
depPath = Path('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/SOD/RGBDcollection/depth/')
gtPath = Path('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/SOD/RGBDcollection/GT/')rgbFiles = get_image_files(rgbPath)
depFiles = get_image_files(depPath)
lblFiles = get_image_files(gtPath)
rgbFiles[0]
depFiles[0]
lblFiles[0]Path('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/SOD/RGBDcollection/LR/001477_left_1_ori.jpg')
Path('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/SOD/RGBDcollection/depth/10_01-16-36_0_Depth.png')
Path('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/SOD/RGBDcollection/GT/9_07-38-26_0_GT.png')
to_tensor = transforms.ToTensor()
to_pil = transforms.ToPILImage()
rgbImage = Image.open(rgbFiles[0])
lblImage = Image.open(lblFiles[0])
depImage = Image.open(depFiles[0])
rgbTensor = image2tensor(rgbImage)
lblTensor = image2tensor(lblImage)
depTensor = image2tensor(depImage)rgbImage
depImage
depImage.shape
depTensor.shape
torch.unique(depTensor)
len(torch.unique(depTensor))torch.Size([3, 480, 640])
tensor([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83,
84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97,
98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125,
126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153,
154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167,
168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181,
182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195,
196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237,
238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
252, 253, 254, 255], dtype=torch.uint8)
256
depFiles[0]Path('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/SOD/RGBDcollection/depth/10_01-16-36_0_Depth.png')
import cv2
im = cv2.imread('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/SOD/RGBDcollection/depth/10_01-16-36_0_Depth.png',-1)
# cv2.imshow("test",im)
attention: using opencv to read the depth image we can get the 3 dimensions of it,but using PIL,we can only get 1 dimension
in_ = np.array(im, dtype=np.float32)
in_
# attention: using opencv to read the depth image we can get the 3 dimensions of it,but using PIL,we can only get 1 dimension
in_.shape
len(np.unique(in_))array([[[172., 172., 172.],
[172., 172., 172.],
[172., 172., 172.],
...,
[205., 205., 205.],
[205., 205., 205.],
[205., 205., 205.]],
[[172., 172., 172.],
[172., 172., 172.],
[172., 172., 172.],
...,
[205., 205., 205.],
[205., 205., 205.],
[205., 205., 205.]],
[[171., 171., 171.],
[171., 171., 171.],
[171., 171., 171.],
...,
[205., 205., 205.],
[205., 205., 205.],
[205., 205., 205.]],
...,
[[135., 135., 135.],
[127., 127., 127.],
[108., 108., 108.],
...,
[249., 249., 249.],
[249., 249., 249.],
[249., 249., 249.]],
[[134., 134., 134.],
[127., 127., 127.],
[108., 108., 108.],
...,
[249., 249., 249.],
[249., 249., 249.],
[249., 249., 249.]],
[[134., 134., 134.],
[125., 125., 125.],
[108., 108., 108.],
...,
[249., 249., 249.],
[249., 249., 249.],
[249., 249., 249.]]], dtype=float32)
(480, 640, 3)
256
def Normalization(image):
# set_trace()
# 最后一维倒着取数
# cv2 读取图片的顺序是BGR,转换为RGB格式
in_ = image[:, :, ::-1]
in_ = in_ / 255.0
in_ -= np.array((0.485, 0.456, 0.406))
in_ /= np.array((0.229, 0.224, 0.225))
return in_temp = np.array((1,1,1),dtype=np.float32)
temp2 = np.array((2,2,2),dtype=np.float32)
temp-=temp2
temp/=temp2
temparray([-0.5, -0.5, -0.5], dtype=float32)
# author use the following method to read both rgbImage and depth image
def load_image(path,image_size):
im = cv2.imread(path)
in_ = np.array(im, dtype=np.float32)
in_ = cv2.resize(in_, (image_size, image_size))
in_ = Normalization(in_)
return in_
def load_sal_label(path,image_size):
im = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
label = np.array(im, dtype=np.float32)
label = cv2.resize(label, (image_size, image_size))
label = label / 255.0
label = label[..., np.newaxis]
return labelrgbFiles[0]Path('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/SOD/RGBDcollection/LR/001477_left_1_ori.jpg')
slicing issues of three dimensions
import numpy as np
b = np.array([[[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]],
[[13, 14, 15, 16], [17, 18, 19, 20], [21, 22, 23, 24]],
[[25, 26, 27, 28], [29, 30, 31, 32], [33, 34, 35, 36]],
])
print(f'b is {b}')
print(b.shape)
print("b[:, :, ::-1]",b[:, :, ::-1], b[:, :, ::-1].shape)
print("b[0, ::],b[1, ::],b[-1, ::],b[0:2, ::]")
print("b[0, ::]",b[0, ::], b[0, ::].shape)
print("b[1, ::]",b[1, ::], b[1, ::].shape)
print("b[-1, ::]",b[-1, ::], b[-1, ::].shape)
print("b[0:2, ::]",b[0:2, ::], b[0:2, ::].shape)
print("b[:, 0:],b[:, 1:],b[:, -1:],b[:, 0:2:]")
print("b[:, 0:]",b[:, 0:], b[:, 0:].shape)
print("b[:, 1:]",b[:, 1:], b[:, 1:].shape)
print("b[:, -1:]",b[:, -1:], b[:, -1:].shape)
print("b[:, 0:2:]",b[:, 0:2:], b[:, 0:2:].shape)
print("b[::, 0],b[::, 1],b[::, -1],b[::, 0:2:]")
print("b[::, 0]",b[::, 0], b[::, 0].shape)
print("b[::, 1]",b[::, 1], b[::, 1].shape)
print("b[::, -1]",b[::, -1], b[::, -1].shape)
print("b[::, 0:2:]",b[::, 0:2:], b[::, 0:2].shape)
print("b[:,:, 0],b[:,:, 1],b[:,:, -1],b[:,:, 0:2:]")
print("b[:, :, 0]",b[:, :, 0], b[:, :, 0].shape)
print("b[:, :, 1]",b[:, :, 1], b[:, :, 1].shape)
print("b[:, :, -1]",b[:, :, -1], b[:, :, -1].shape)b is [[[ 1 2 3 4]
[ 5 6 7 8]
[ 9 10 11 12]]
[[13 14 15 16]
[17 18 19 20]
[21 22 23 24]]
[[25 26 27 28]
[29 30 31 32]
[33 34 35 36]]]
(3, 3, 4)
b[:, :, ::-1] [[[ 4 3 2 1]
[ 8 7 6 5]
[12 11 10 9]]
[[16 15 14 13]
[20 19 18 17]
[24 23 22 21]]
[[28 27 26 25]
[32 31 30 29]
[36 35 34 33]]] (3, 3, 4)
b[0, ::],b[1, ::],b[-1, ::],b[0:2, ::]
b[0, ::] [[ 1 2 3 4]
[ 5 6 7 8]
[ 9 10 11 12]] (3, 4)
b[1, ::] [[13 14 15 16]
[17 18 19 20]
[21 22 23 24]] (3, 4)
b[-1, ::] [[25 26 27 28]
[29 30 31 32]
[33 34 35 36]] (3, 4)
b[0:2, ::] [[[ 1 2 3 4]
[ 5 6 7 8]
[ 9 10 11 12]]
[[13 14 15 16]
[17 18 19 20]
[21 22 23 24]]] (2, 3, 4)
b[:, 0:],b[:, 1:],b[:, -1:],b[:, 0:2:]
b[:, 0:] [[[ 1 2 3 4]
[ 5 6 7 8]
[ 9 10 11 12]]
[[13 14 15 16]
[17 18 19 20]
[21 22 23 24]]
[[25 26 27 28]
[29 30 31 32]
[33 34 35 36]]] (3, 3, 4)
b[:, 1:] [[[ 5 6 7 8]
[ 9 10 11 12]]
[[17 18 19 20]
[21 22 23 24]]
[[29 30 31 32]
[33 34 35 36]]] (3, 2, 4)
b[:, -1:] [[[ 9 10 11 12]]
[[21 22 23 24]]
[[33 34 35 36]]] (3, 1, 4)
b[:, 0:2:] [[[ 1 2 3 4]
[ 5 6 7 8]]
[[13 14 15 16]
[17 18 19 20]]
[[25 26 27 28]
[29 30 31 32]]] (3, 2, 4)
b[::, 0],b[::, 1],b[::, -1],b[::, 0:2:]
b[::, 0] [[ 1 2 3 4]
[13 14 15 16]
[25 26 27 28]] (3, 4)
b[::, 1] [[ 5 6 7 8]
[17 18 19 20]
[29 30 31 32]] (3, 4)
b[::, -1] [[ 9 10 11 12]
[21 22 23 24]
[33 34 35 36]] (3, 4)
b[::, 0:2:] [[[ 1 2 3 4]
[ 5 6 7 8]]
[[13 14 15 16]
[17 18 19 20]]
[[25 26 27 28]
[29 30 31 32]]] (3, 2, 4)
b[:,:, 0],b[:,:, 1],b[:,:, -1],b[:,:, 0:2:]
b[:, :, 0] [[ 1 5 9]
[13 17 21]
[25 29 33]] (3, 3)
b[:, :, 1] [[ 2 6 10]
[14 18 22]
[26 30 34]] (3, 3)
b[:, :, -1] [[ 4 8 12]
[16 20 24]
[28 32 36]] (3, 3)
load_image('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/SOD/RGBDcollection/depth/10_01-16-36_0_Depth.png',512).shape(512, 512, 3)
add some fog
# modification of https://github.com/FLHerne/mapgen/blob/master/diamondsquare.py
def plasma_fractal(mapsize=256, wibbledecay=3):
"""
Generate a heightmap using diamond-square algorithm.
Return square 2d array, side length 'mapsize', of floats in range 0-255.
'mapsize' must be a power of two.
"""
assert (mapsize & (mapsize - 1) == 0)
maparray = np.empty((mapsize, mapsize), dtype=np.float_)
maparray[0, 0] = 0
stepsize = mapsize
wibble = 100
def wibbledmean(array):
return array / 4 + wibble * np.random.uniform(-wibble, wibble, array.shape)
def fillsquares():
"""For each square of points stepsize apart,
calculate middle value as mean of points + wibble"""
cornerref = maparray[0:mapsize:stepsize, 0:mapsize:stepsize]
squareaccum = cornerref + np.roll(cornerref, shift=-1, axis=0)
squareaccum += np.roll(squareaccum, shift=-1, axis=1)
maparray[stepsize // 2:mapsize:stepsize,
stepsize // 2:mapsize:stepsize] = wibbledmean(squareaccum)
def filldiamonds():
"""For each diamond of points stepsize apart,
calculate middle value as mean of points + wibble"""
mapsize = maparray.shape[0]
drgrid = maparray[stepsize // 2:mapsize:stepsize, stepsize // 2:mapsize:stepsize]
ulgrid = maparray[0:mapsize:stepsize, 0:mapsize:stepsize]
ldrsum = drgrid + np.roll(drgrid, 1, axis=0)
lulsum = ulgrid + np.roll(ulgrid, -1, axis=1)
ltsum = ldrsum + lulsum
maparray[0:mapsize:stepsize, stepsize // 2:mapsize:stepsize] = wibbledmean(ltsum)
tdrsum = drgrid + np.roll(drgrid, 1, axis=1)
tulsum = ulgrid + np.roll(ulgrid, -1, axis=0)
ttsum = tdrsum + tulsum
maparray[stepsize // 2:mapsize:stepsize, 0:mapsize:stepsize] = wibbledmean(ttsum)
while stepsize >= 2:
fillsquares()
filldiamonds()
stepsize //= 2
wibble /= wibbledecay
maparray -= maparray.min()
return maparray / maparray.max()def fog(x, severity=1):
c = [(1.5, 2), (2, 2), (2.5, 1.7), (2.5, 1.5), (3, 1.4)][severity - 1]
set_trace()
x = np.array(x) / 255.
max_val = x.max()
x += c[0] * plasma_fractal(wibbledecay=c[1])[:224, :224][..., np.newaxis]
return np.clip(x * max_val / (max_val + c[0]), 0, 1) * 255rgbTensor.shapetorch.Size([3, 496, 869])
fog(rgbTensor,severity=3)> /tmp/ipykernel_6080/425086914.py(5)fog()
3 set_trace()
4
----> 5 x = np.array(x) / 255.
6 max_val = x.max()
7 x += c[0] * plasma_fractal(wibbledecay=c[1])[:224, :224][..., np.newaxis]
ipdb> n
> /tmp/ipykernel_6080/425086914.py(6)fog()
4
5 x = np.array(x) / 255.
----> 6 max_val = x.max()
7 x += c[0] * plasma_fractal(wibbledecay=c[1])[:224, :224][..., np.newaxis]
8 return np.clip(x * max_val / (max_val + c[0]), 0, 1) * 255
ipdb> x.shape
(3, 496, 869)
ipdb> c[0]
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) /tmp/ipykernel_6080/3169706399.py in <module> ----> 1 fog(rgbTensor,severity=3) /tmp/ipykernel_6080/425086914.py in fog(x, severity) 4 5 x = np.array(x) / 255. ----> 6 max_val = x.max() 7 x += c[0] * plasma_fractal(wibbledecay=c[1])[:224, :224][..., np.newaxis] 8 return np.clip(x * max_val / (max_val + c[0]), 0, 1) * 255 ValueError: operands could not be broadcast together with shapes (3,496,869) (224,224,1) (3,496,869)